from lxml import etree

text = '''
<div>
    <ul>
         <li class="item-0"><a href="link1.html">first item</a></li>
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-inactive"><a href="link3.html">third item</a></li>
         <li class="item-1"><a href="link4.html">fourth item</a></li>
         <li class="item-0"><a href="link5.html">fifth item</a> # 注意，此处缺少一个 </li> 闭合标签
     </ul>
 </div>
'''

def getHTML():
    html=etree.HTML(text)
    print(type(html))
    result=html.xpath('//li/a//@href')
    print(result)

def getparse():
    html=etree.parse('./collection/hello.html')
    lis=html.xpath('//li[@class="item-1"]')
    print(lis)

getHTML()
